Take-home Exercise 2

Spatial Point Patterns Analysis of Airbnb Listing in Singapore.

Yu Yiling https://www.linkedin.com/in/yiling-yu/
09-25-2021

Take-home Exercise 2 requirements

Things learned from Take-home Exercise 1

Note!

Data used

1. Install packages

packages = c('maptools', 'sf', 'raster','spatstat', 'tmap','tidyverse')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}

2. Import and transform data

MRT_sf <- st_read(dsn = "data/LTA_TrainStation_Aug2021", layer="MRTLRTStnPtt")
Reading layer `MRTLRTStnPtt' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\LTA_TrainStation_Aug2021' 
  using driver `ESRI Shapefile'
Simple feature collection with 171 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 6138.311 ymin: 27555.06 xmax: 45254.86 ymax: 47854.2
Projected CRS: SVY21
sg_sf <- st_read(dsn = "data/sg", layer="CostalOutline")
Reading layer `CostalOutline' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\sg' 
  using driver `ESRI Shapefile'
Simple feature collection with 60 features and 4 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 2663.926 ymin: 16357.98 xmax: 56047.79 ymax: 50244.03
Projected CRS: SVY21
bus_sf <- st_read(dsn = "data/BusStopLocation", layer="BusStop")
Reading layer `BusStop' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\BusStopLocation' 
  using driver `ESRI Shapefile'
Simple feature collection with 5156 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
Projected CRS: SVY21
listings21 <- read_csv("data/Airbnb_listings/29062021.csv")
listings19 <- read_csv("data/Airbnb_listings/30062019.csv")

hotels <- read_csv("data/OneMap_Data/hotels.csv")
tourism <- read_csv("data/OneMap_Data/tourism.csv")
hawker_sf <- st_read("data/hawker_centres/hawker-centres-geojson.geojson") %>%
  st_transform(crs = 3414)
Reading layer `hawker-centres-geojson' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\hawker_centres\hawker-centres-geojson.geojson' 
  using driver `GeoJSON'
Simple feature collection with 125 features and 2 fields
Geometry type: POINT
Dimension:     XYZ
Bounding box:  xmin: 103.6974 ymin: 1.272716 xmax: 103.9882 ymax: 1.449217
z_range:       zmin: 0 zmax: 0
Geodetic CRS:  WGS 84
listings21_sf <- st_as_sf(listings21, 
                       coords = c("longitude", "latitude"),
                       crs=4326) %>%
  st_transform(crs = 3414)

listings19_sf <- st_as_sf(listings19, 
                       coords = c("longitude", "latitude"),
                       crs=4326) %>%
  st_transform(crs = 3414)

hotels_sf <- st_as_sf(hotels, 
                       coords = c("Lng", "Lat"),
                       crs=4326) %>%
  st_transform(crs = 3414)

tourism_sf <- st_as_sf(tourism, 
                       coords = c("Lng", "Lat"),
                       crs=4326) %>%
  st_transform(crs = 3414)

MRT_sf <- st_set_crs(MRT_sf, 3414)
sg_sf <- st_set_crs(sg_sf, 3414)
bus_sf <- st_set_crs(bus_sf, 3414)

Have a glance at the point maps

tmap_mode("view")
tmap_options(check.and.fix = TRUE)

tm_shape(sg_sf) +
  tm_polygons() +
tm_shape(listings19_sf) +
  tm_dots(alpha=0.4,
          col="blue",
          size=0.05) +
tm_shape(listings21_sf) +
  tm_dots(alpha=0.4,
          col="red",
          size=0.05)
tmap_mode("plot")

3. Geospatial data wrangling

a. Converting sf data frames to Spatial class

MRT <- as_Spatial(MRT_sf)
listings21 <- as_Spatial(listings21_sf)
listings19 <- as_Spatial(listings19_sf)
hotels <- as_Spatial(hotels_sf)
tourism <- as_Spatial(tourism_sf)
sg <- as_Spatial(st_zm(sg_sf))
hawker <- as_Spatial(hawker_sf)
bus <- as_Spatial(bus_sf)

b. Converting the Spatial class into generic sp object

MRT_sp <- as(MRT, "SpatialPoints")
listings21_sp <- as(listings21, "SpatialPoints")
listings19_sp <- as(listings19, "SpatialPoints")
hotels_sp <- as(hotels, "SpatialPoints")
tourism_sp <- as(tourism, "SpatialPoints")
sg_sp <- as(sg, "SpatialPolygons")
hawker_sp <- as(hawker, "SpatialPoints")
bus_sp <- as(bus, "SpatialPoints")

c. Converting the generic sp object into spatstat’s ppp object

MRT_ppp <- as(MRT_sp, "ppp")
listings21_ppp <- as(listings21_sp, "ppp")
listings19_ppp <- as(listings19_sp, "ppp")
hotels_ppp <- as(hotels_sp, "ppp")
tourism_ppp <- as(tourism_sp, "ppp")
hawker_ppp <- as(hawker_sp, "ppp")
bus_ppp <- as(bus_sp, "ppp")
summary(MRT_ppp)
Planar point pattern:  171 points
Average intensity 2.153565e-07 points per square unit

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [6138.31, 45254.86] x [27555.06, 47854.2] units
                    (39120 x 20300 units)
Window area = 794032000 square units
summary(listings21_ppp)
Planar point pattern:  4238 points
Average intensity 5.114513e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [7406.99, 43337.89] x [25330, 48391.55] units
                    (35930 x 23060 units)
Window area = 828622000 square units
summary(listings19_ppp)
Planar point pattern:  8293 points
Average intensity 9.345289e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [7215.57, 44098.31] x [25166.35, 49226.35] units
                    (36880 x 24060 units)
Window area = 887399000 square units
summary(hotels_ppp)
Planar point pattern:  422 points
Average intensity 5.58414e-07 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [5939.24, 45334.18] x [25379.44, 44562.4] units
                    (39390 x 19180 units)
Window area = 755712000 square units
summary(tourism_ppp)
Planar point pattern:  107 points
Average intensity 3.947904e-13 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 2 decimal places
i.e. rounded to the nearest multiple of 0.01 units

Window: rectangle = [-13597396, 43660] x [22869, 19891558] units
                    (13640000 x 19870000 units)
Window area = 2.7103e+14 square units
summary(hawker_ppp)
Planar point pattern:  125 points
Average intensity 1.978798e-07 points per square unit

Coordinates are given to 2 decimal places
i.e. rounded to the nearest multiple of 0.01 units

Window: rectangle = [12874.19, 45241.4] x [28355.97, 47872.53] units
                    (32370 x 19520 units)
Window area = 631697000 square units
summary(bus_ppp)
Planar point pattern:  5156 points
Average intensity 4.436334e-06 points per square unit

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [4427.94, 48282.5] x [26482.1, 52983.82] units
                    (43850 x 26500 units)
Window area = 1162220000 square units

d. Handling duplicate points

listings21_ppp_jit <- rjitter(listings21_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(listings21_ppp_jit))
[1] FALSE
listings19_ppp_jit <- rjitter(listings19_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(listings19_ppp_jit))
[1] FALSE
hotels_ppp_jit <- rjitter(hotels_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(hotels_ppp_jit))
[1] FALSE
tourism_ppp_jit <- rjitter(tourism_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(tourism_ppp_jit))
[1] FALSE
any(duplicated(MRT_ppp))
[1] FALSE
any(duplicated(hawker_ppp))
[1] FALSE
any(duplicated(bus_ppp))
[1] FALSE

e. Creating owin object

covert sg spatialpolygon object into owin object of spatstat

sg_owin <- as(sg_sp, "owin")
plot(sg_owin)

f. Combining point events object and owin object

listings21SG_ppp = listings21_ppp_jit[sg_owin]
listings19SG_ppp = listings19_ppp_jit[sg_owin]
hotelsSG_ppp = hotels_ppp_jit[sg_owin]
tourismSG_ppp = tourism_ppp_jit[sg_owin]
MRTSG_ppp = MRT_ppp[sg_owin]
hawkerSG_ppp = hawker_ppp[sg_owin]
busSG_ppp = bus_ppp[sg_owin]
par(mar=c(1,1,1,1))
par(mfrow=c(4,2))
plot(listings21SG_ppp)
plot(listings19SG_ppp)
plot(hotelsSG_ppp)
plot(tourismSG_ppp)
plot(MRTSG_ppp)
plot(hawkerSG_ppp)
plot(busSG_ppp)

4. Section A: Airbnb Distribution in 2019

4.1 First-order Spatial Point Patterns Analysis: A. deriving kernel density estimation (KDE) layer for visualising and exploring the intensity of point processes

a. Computing kernel density estimation using automatic bandwidth selection method

Using sigma = bw.diggle

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
listings19SG_ppp.km <- rescale(listings19SG_ppp, 1000, "km")
#plot
kde_listings19SG_bw <- density(listings19SG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian") 

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
hotelsSG_ppp.km <- rescale(hotelsSG_ppp, 1000, "km")
#plot
kde_hotelsSG_bw <- density(hotelsSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian") 

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
tourismSG_ppp.km <- rescale(tourismSG_ppp, 1000, "km")
#plot
kde_tourismSG_bw <- density(tourismSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian") 

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
MRTSG_ppp.km <- rescale(MRTSG_ppp, 1000, "km")
#plot
kde_MRTSG_bw <- density(MRTSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian")

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
hawkerSG_ppp.km <- rescale(hawkerSG_ppp, 1000, "km")
#plot
kde_hawkerSG_bw <- density(hawkerSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian")

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
busSG_ppp.km <- rescale(busSG_ppp, 1000, "km")
#plot
kde_busSG_bw <- density(busSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian")

par(mar=c(1,1,1,1))
par(mfrow=c(3,2))
plot(kde_listings19SG_bw, main = "listings19")
plot(kde_hotelsSG_bw, main = "hotels")
plot(kde_tourismSG_bw, main = "tourism")
plot(kde_MRTSG_bw, main = "MRT")
plot(kde_hawkerSG_bw, main = "hawker")
plot(kde_busSG_bw, main = "bus")

RETRIEVE THE BANDWIDTH USED TO COMPUTE THE KDE LAYER

cat('bandwidth for listings19: ', bw.diggle(listings19SG_ppp.km), '\n')
cat('bandwidth for hotels: ', bw.diggle(hotelsSG_ppp.km), '\n')
cat('bandwidth for tourism: ', bw.diggle(tourismSG_ppp.km), '\n')
cat('bandwidth for MRT: ', bw.diggle(MRTSG_ppp.km), '\n')
cat('bandwidth for hawker: ', bw.diggle(hawkerSG_ppp.km), '\n')
cat('bandwidth for bus: ', bw.diggle(busSG_ppp.km), '\n')

Let’s try to look at the bandwidths returned by these two methods first

#let's see some examples
bw.diggle(listings19SG_ppp.km)
     sigma 
0.04196627 
bw.ppl(listings19SG_ppp.km)
    sigma 
0.1847274 
bw.diggle(tourismSG_ppp.km)
   sigma 
2.113734 
bw.ppl(tourismSG_ppp.km)
   sigma 
17.54253 
bw.diggle(busSG_ppp.km)
    sigma 
0.3927249 
bw.ppl(busSG_ppp.km)
    sigma 
0.1104879 

Using sigma = bw.ppl

kde_listings19SG_ppl <- density(listings19SG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian") 

kde_hotelsSG_ppl <- density(hotelsSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian") 

kde_tourismSG_ppl <- density(tourismSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian") 

kde_MRTSG_ppl <- density(MRTSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian")

kde_hawkerSG_ppl <- density(hawkerSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian")

kde_busSG_ppl <- density(busSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian")

plot and compare bw.diggle and bw.ppl

par(mar=c(1,1,1,1))
par(mfrow=c(6,2))
plot(kde_listings19SG_bw, main = "listings19 diggle")
plot(kde_listings19SG_ppl, main = "listings19 ppl")
plot(kde_hotelsSG_bw, main = "hotels diggle")
plot(kde_hotelsSG_ppl, main = "hotels ppl")
plot(kde_tourismSG_bw, main = "tourism diggle")
plot(kde_tourismSG_ppl, main = "tourism ppl")
plot(kde_MRTSG_bw, main = "MRT diggle")
plot(kde_MRTSG_ppl, main = "MRT ppl")
plot(kde_hawkerSG_bw, main = "hawker diggle")
plot(kde_hawkerSG_ppl, main = "hawker ppl")
plot(kde_busSG_bw, main = "bus diggle")
plot(kde_busSG_ppl, main = "bus ppl")

b. Computing kernel density estimation using fixed/adaptive bandwidth method

using fixed bandwidth

#compute a KDE layer by defining a bandwidth of 1000 meter. Notice the sigma value used is 1. This is because the unit of measurement of childcareSG_ppp.km object is in kilometer, hence the 1000m is 1km.

kde_listings19SG_bw_1 <- density(listings19SG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian") 

kde_hotelsSG_bw_1 <- density(hotelsSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian") 

kde_tourismSG_bw_1 <- density(tourismSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian") 

kde_MRTSG_bw_1 <- density(MRTSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian")

kde_hawkerSG_bw_1 <- density(hawkerSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian")

kde_busSG_bw_1 <- density(busSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian")

using adaptive bandwidth

kde_listings19SG_adaptive <- adaptive.density(listings19SG_ppp.km, method="kernel")

kde_hotelsSG_adaptive <- adaptive.density(hotelsSG_ppp.km, method="kernel")

kde_tourismSG_adaptive <- adaptive.density(tourismSG_ppp.km, method="kernel")

kde_MRTSG_adaptive <- adaptive.density(MRTSG_ppp.km, method="kernel")

kde_hawkerSG_adaptive <- adaptive.density(hawkerSG_ppp.km, method="kernel")

kde_busSG_adaptive <- adaptive.density(busSG_ppp.km, method="kernel")

plot and compare fixed and adaptive bandwidth

par(mar=c(1,1,1,1))
par(mfrow=c(6,2))
plot(kde_listings19SG_bw_1, main = "listings19 fixed")
plot(kde_listings19SG_adaptive, main = "listings19 adaptive")
plot(kde_hotelsSG_bw_1, main = "hotels fixed")
plot(kde_hotelsSG_adaptive, main = "hotels adaptive")
plot(kde_tourismSG_bw_1, main = "tourism fixed")
plot(kde_tourismSG_adaptive, main = "tourism adaptive")
plot(kde_MRTSG_bw_1, main = "MRT fixed")
plot(kde_MRTSG_adaptive, main = "MRT adaptive")
plot(kde_hawkerSG_bw_1, main = "hawker fixed")
plot(kde_hawkerSG_adaptive, main = "hawker adaptive")
plot(kde_busSG_bw_1, main = "bus fixed")
plot(kde_busSG_adaptive, main = "bus adaptive")

c. Convert KDE output into raster and display the kernel density maps on openstreetmap of Singapore

# convert KDE output into grid
gridded_kde_listings19SG_bw <- as.SpatialGridDataFrame.im(kde_listings19SG_bw)
gridded_kde_hotelsSG_bw <- as.SpatialGridDataFrame.im(kde_hotelsSG_bw)
gridded_kde_tourismSG_bw <- as.SpatialGridDataFrame.im(kde_tourismSG_bw)
gridded_kde_MRTSG_bw <- as.SpatialGridDataFrame.im(kde_MRTSG_bw)
gridded_kde_hawkerSG_bw <- as.SpatialGridDataFrame.im(kde_hawkerSG_bw)
gridded_kde_busSG_bw <- as.SpatialGridDataFrame.im(kde_busSG_bw)

# convert gridded output into raster
kde_listings19SG_bw_raster <- raster(gridded_kde_listings19SG_bw)
kde_hotelsSG_bw_raster <- raster(gridded_kde_hotelsSG_bw)
kde_tourismSG_bw_raster <- raster(gridded_kde_tourismSG_bw)
kde_MRTSG_bw_raster <- raster(gridded_kde_MRTSG_bw)
kde_hawkerSG_bw_raster <- raster(gridded_kde_hawkerSG_bw)
kde_busSG_bw_raster <- raster(gridded_kde_busSG_bw)

# assign projection system to raster
projection(kde_listings19SG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_hotelsSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_tourismSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_MRTSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_hawkerSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_busSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")

# visualise the raster kernel density maps on openstreetmap of Singapore
tmap_mode("view")
tmap_options(check.and.fix = TRUE)

listings19SG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_bw_raster) + 
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "listings19SG") +
  tm_basemap('OpenStreetMap')

hotelsSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_hotelsSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "hotelsSG") +
  tm_basemap('OpenStreetMap')

tourismSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_tourismSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "tourismSG") +
  tm_basemap('OpenStreetMap')

MRTSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_MRTSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "MRTSG") +
  tm_basemap('OpenStreetMap')

hawkerSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_hawkerSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "hawkerSG") +
  tm_basemap('OpenStreetMap')

busSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_busSG_bw_raster) + 
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "busSG") +
  tm_basemap('OpenStreetMap')

tmap_arrange(listings19SG_osmap, hotelsSG_osmap, tourismSG_osmap, MRTSG_osmap, hawkerSG_osmap, busSG_osmap, asp=2, ncol=2)
tmap_mode('plot')

4.2 Second-order Spatial Point Patterns Analysis: Cross K-Function and Cross L-Function

a. Prepare merged ppp objects by using superimpose()

listings19_vs_hotels<- superimpose('listings19'=listings19SG_ppp.km, 'hotels'=hotelsSG_ppp.km)

listings19_vs_tourism<- superimpose('listings19'=listings19SG_ppp.km, 'tourism'=tourismSG_ppp.km)

listings19_vs_MRT<- superimpose('listings19'=listings19SG_ppp.km, 'MRT'=MRTSG_ppp.km)

listings19_vs_hawker<- superimpose('listings19'=listings19SG_ppp.km, 'hawker'=hawkerSG_ppp.km)

listings19_vs_bus<- superimpose('listings19'=listings19SG_ppp.km, 'bus'=busSG_ppp.km)

b. Second-order Multi-tpye Point Patterns Analysis: Cross K-Function

Note:

For each comparison between the 2019 Airbnb listings and one of the factors, I’m going to plot the Kcross and perform a Complete Spatial Randomness(CSR) testing on the Cross K-Function.

The hypothesis are as follows: + H0 = The distribution of 2019 Airbnb listings and the factor are spatially independent. + H1 = The distribution of 2019 Airbnb listings and the factor are NOT spatially independent. + The null hypothesis will be rejected if p-value is smaller than alpha value of 0.01 (i.e. at 99% confident interval).

i. listings19 vs hotels

listings19_vs_hotels_Kcross <- Kcross(listings19_vs_hotels, 
                           i="listings19", j="hotels",
                           correction='border')
plot(listings19_vs_hotels_Kcross)

listings19_vs_hotels_Kcross.csr <- envelope(listings19_vs_hotels, Kcross, i="listings19", j="hotels", correction='border', nsim=99)

plot(listings19_vs_hotels_Kcross.csr, xlab="distance(km)", xlim=c(0,10))

ii. listings19 vs tourism

listings19_vs_tourism_Kcross <- Kcross(listings19_vs_tourism, 
                           i="listings19", j="tourism",
                           correction='border')
plot(listings19_vs_tourism_Kcross)

listings19_vs_tourism_Kcross.csr <- envelope(listings19_vs_tourism, Kcross, i="listings19", j="tourism", correction='border', nsim=99)

plot(listings19_vs_tourism_Kcross.csr, xlab="distance(km)", xlim=c(0,10))

iii. listings19 vs MRT

listings19_vs_MRT_Kcross <- Kcross(listings19_vs_MRT, 
                           i="listings19", j="MRT",
                           correction='border')
plot(listings19_vs_MRT_Kcross)

listings19_vs_MRT_Kcross.csr <- envelope(listings19_vs_MRT, Kcross, i="listings19", j="MRT", correction='border', nsim=99)

plot(listings19_vs_MRT_Kcross.csr, xlab="distance(m)", xlim=c(0,10))

iv. listings19 vs hawker

listings19_vs_hawker_Kcross <- Kcross(listings19_vs_hawker, 
                           i="listings19", j="hawker",
                           correction='border')
plot(listings19_vs_hawker_Kcross)

listings19_vs_hawker_Kcross.csr <- envelope(listings19_vs_hawker, Kcross, i="listings19", j="hawker", correction='border', nsim=99)

plot(listings19_vs_hawker_Kcross.csr, xlab="distance(m)", xlim=c(0,10))

v. listings19 vs bus

listings19_vs_bus_Kcross <- Kcross(listings19_vs_bus, 
                           i="listings19", j="bus",
                           correction='border')
plot(listings19_vs_bus_Kcross)

listings19_vs_bus_Kcross.csr <- envelope(listings19_vs_bus, Kcross, i="listings19", j="bus", correction='border', nsim=99)

plot(listings19_vs_bus_Kcross.csr, xlab="distance(m)", xlim=c(0,10))

c. Second-order Multi-tpye Point Patterns Analysis: Cross L-Function

Note:

Cross L-Function and Cross K-Function are the actually the same but just Cross L-Function is the standardized version of Cross K-Function. Their CSR testing hypothesis, analysis and conclusions are all the same. I would just like to draw here the L-Functions for better visualization purpose.

i. listings19 vs hotels

listings19_vs_hotels_Lcross <- Lcross(listings19_vs_hotels, 
                           i="listings19", j="hotels",
                           correction='border')
plot(listings19_vs_hotels_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_hotels_Lcross.csr <- envelope(listings19_vs_hotels, Lcross, i="listings19", j="hotels", correction='border', nsim=99)

plot(listings19_vs_hotels_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

ii. listings19 vs tourism

listings19_vs_tourism_Lcross <- Lcross(listings19_vs_tourism, 
                           i="listings19", j="tourism",
                           correction='border')
plot(listings19_vs_tourism_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_tourism_Lcross.csr <- envelope(listings19_vs_tourism, Lcross, i="listings19", j="tourism", correction='border', nsim=99)

plot(listings19_vs_tourism_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

iii. listings19 vs MRT

listings19_vs_MRT_Lcross <- Lcross(listings19_vs_MRT, 
                           i="listings19", j="MRT",
                           correction='border')
plot(listings19_vs_MRT_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_MRT_Lcross.csr <- envelope(listings19_vs_MRT, Lcross, i="listings19", j="MRT", correction='border', nsim=99)

plot(listings19_vs_MRT_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

iv. listings19 vs hawker

listings19_vs_hawker_Lcross <- Lcross(listings19_vs_hawker, 
                           i="listings19", j="hawker",
                           correction='border')
plot(listings19_vs_hawker_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_hawker_Lcross.csr <- envelope(listings19_vs_hawker, Lcross, i="listings19", j="hawker", correction='border', nsim=99)

plot(listings19_vs_hawker_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

v. listings19 vs bus

listings19_vs_bus_Lcross <- Lcross(listings19_vs_bus, 
                           i="listings19", j="bus",
                           correction='border')
plot(listings19_vs_bus_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_bus_Lcross.csr <- envelope(listings19_vs_bus, Lcross, i="listings19", j="bus", correction='border', nsim=99)

plot(listings19_vs_bus_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

d. Summary: Cross K-Function and Cross L-Function

From the above Cross K-Function and Cross L-Function analysis, we can see among the 5 location factors, 2019 Airbnb listings are not independent with hotels, MRT stations, hawker centres and bus stops. These are be the factors that may bring Airbnb customers and profit so these Airbnb are located there. However, 2019 Airbnb listings are independent with tourist attractions, this may because the tourist attractions are too few to compare or Airbnb customers do not need to live near those attractions due to convenient MRT. All these observations are in accord with the patterns we see in our KDE maps of First-order analysis.

5. Section B: Impact of COVID-19

5.1 First-order Spatial Point Patterns Analysis: A. Derive kernel density maps of all Airbnb listings and Airbnb by room type as at June 2019 and June 2021

a. EDA and Geospatial data wrangling

check 2019 and 2020 room types

unique(listings19_sf$"room_type")
[1] "Private room"    "Entire home/apt" "Shared room"    
unique(listings21_sf$"room_type")
[1] "Private room"    "Entire home/apt" "Shared room"    
[4] "Hotel room"     

filter important field and convert to SpatialPointsDataFrame

listings19_sf <-listings19_sf %>%
  select(room_type)
listings21_sf <-listings21_sf %>%
  select(room_type)

listings19 <- as_Spatial(listings19_sf)
listings21 <- as_Spatial(listings21_sf)

change marked field to factor data type

listings19@data$"room_type" <- as.factor(listings19@data$"room_type")
listings21@data$"room_type" <- as.factor(listings21@data$"room_type")

peek the distribution

tmap_mode("plot")
tm_shape(sg) +
  tm_borders(alpha = 0.5) +
tm_shape(listings19) +
  tm_dots(col = 'room_type', 
          size = 0.5) +
  tm_layout(title = "2019 room_type") +
tm_facets(by="room_type")
tm_shape(sg) +
  tm_borders(alpha = 0.5) +
tm_shape(listings21) +
  tm_dots(col = 'room_type', 
          size = 0.5) +
  tm_layout(title = "2021 room_type") +
tm_facets(by="room_type")

Convert the SpatialPointsDataFrame into ppp (point pattern in spatstat) format

listings19_ppp <- as(listings19, "ppp")
listings21_ppp <- as(listings21, "ppp")

plot(listings19_ppp)
plot(listings21_ppp)

summary(listings19_ppp)
Marked planar point pattern:  8293 points
Average intensity 9.345289e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Multitype:
                frequency proportion    intensity
Entire home/apt      4264 0.51416860 4.805054e-06
Private room         3582 0.43193050 4.036516e-06
Shared room           447 0.05390088 5.037193e-07

Window: rectangle = [7215.57, 44098.31] x [25166.35, 49226.35] units
                    (36880 x 24060 units)
Window area = 887399000 square units
summary(listings21_ppp)
Marked planar point pattern:  4238 points
Average intensity 5.114513e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Multitype:
                frequency proportion    intensity
Entire home/apt      1817 0.42874000 2.192796e-06
Hotel room            198 0.04672015 2.389508e-07
Private room         2050 0.48371870 2.473986e-06
Shared room           173 0.04082114 2.087803e-07

Window: rectangle = [7406.99, 43337.89] x [25330, 48391.55] units
                    (35930 x 23060 units)
Window area = 828622000 square units

Avoiding duplicated spatial point event by using jittering method

listings19_ppp_jit <- rjitter(listings19_ppp, retry=TRUE, nsim=1, drop=TRUE)
listings21_ppp_jit <- rjitter(listings21_ppp, retry=TRUE, nsim=1, drop=TRUE)
any(duplicated(listings19_ppp_jit))
[1] FALSE
any(duplicated(listings21_ppp_jit))
[1] FALSE

COMBINING ppp AND owin object

listings19SG_ppp = listings19_ppp_jit[sg_owin]
listings21SG_ppp = listings21_ppp_jit[sg_owin]

plot(listings19SG_ppp)
plot(listings21SG_ppp)

b. Compute kernel density estimation using fixed bandwidth

par(mar=c(1,1,1,1))
par(mfrow=c(2,2))
# Rescale KDE values: covert the unit of measurement from meter to kilometer.
listings19SG_ppp.km <- rescale(listings19SG_ppp, 1000, "km")
#plot
kde_listings19SG_km_split <-density(split(listings19SG_ppp.km),
                              sigma=0.18,
                              edge=TRUE,
                            kernel="gaussian")
plot(kde_listings19SG_km_split)
# Rescale KDE values: covert the unit of measurement from meter to kilometer.
listings21SG_ppp.km <- rescale(listings21SG_ppp, 1000, "km")
#plot
kde_listings21SG_km_split <-density(split(listings21SG_ppp.km),
                              sigma=0.18,
                              edge=TRUE,
                            kernel="gaussian")
plot(kde_listings21SG_km_split)

intensity(listings19SG_ppp.km)
Entire home/apt    Private room     Shared room 
       5.692220        4.784029        0.597002 
intensity(listings21SG_ppp.km)
Entire home/apt      Hotel room    Private room     Shared room 
      2.4254041       0.2644438       2.7379286       0.2310545 

c. Convert KDE output into raster and display the kernel density maps on openstreetmap of Singapore

#split
kde_listings19SG_EH<-kde_listings19SG_km_split[[1]]
kde_listings19SG_PR<-kde_listings19SG_km_split[[2]]
kde_listings19SG_SR<-kde_listings19SG_km_split[[3]]

kde_listings21SG_EH<-kde_listings21SG_km_split[[1]]
kde_listings21SG_HR<-kde_listings21SG_km_split[[2]]
kde_listings21SG_PR<-kde_listings21SG_km_split[[3]]
kde_listings21SG_SR<-kde_listings21SG_km_split[[4]]

# convert KDE output into grid
gridded_kde_listings19SG_EH <- as.SpatialGridDataFrame.im(kde_listings19SG_EH)
gridded_kde_listings19SG_PR <- as.SpatialGridDataFrame.im(kde_listings19SG_PR)
gridded_kde_listings19SG_SR <- as.SpatialGridDataFrame.im(kde_listings19SG_SR)

gridded_kde_listings21SG_EH <- as.SpatialGridDataFrame.im(kde_listings21SG_EH)
gridded_kde_listings21SG_HR <- as.SpatialGridDataFrame.im(kde_listings21SG_HR)
gridded_kde_listings21SG_PR <- as.SpatialGridDataFrame.im(kde_listings21SG_PR)
gridded_kde_listings21SG_SR <- as.SpatialGridDataFrame.im(kde_listings21SG_SR)

# convert gridded output into raster
kde_listings19SG_EH_raster <- raster(gridded_kde_listings19SG_EH)
kde_listings19SG_PR_raster <- raster(gridded_kde_listings19SG_PR)
kde_listings19SG_SR_raster <- raster(gridded_kde_listings19SG_SR)

kde_listings21SG_EH_raster <- raster(gridded_kde_listings21SG_EH)
kde_listings21SG_HR_raster <- raster(gridded_kde_listings21SG_HR)
kde_listings21SG_PR_raster <- raster(gridded_kde_listings21SG_PR)
kde_listings21SG_SR_raster <- raster(gridded_kde_listings21SG_SR)

# assign projection system to raster
projection(kde_listings19SG_EH_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings19SG_PR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings19SG_SR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")

projection(kde_listings21SG_EH_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings21SG_HR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings21SG_PR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings21SG_SR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
# visualise the raster kernel density maps on openstreetmap of Singapore
tmap_mode("view")
tmap_options(check.and.fix = TRUE)

listings19SG_EH_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_EH_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000), alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2019 Entire home/apt KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_EH_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_EH_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Entire home/apt KDE") + 
  tm_basemap('OpenStreetMap')

listings19SG_PR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_PR_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2019 Private Room KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_PR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_PR_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Private Room KDE") + 
  tm_basemap('OpenStreetMap')

listings19SG_SR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_SR_raster) + 
  tm_raster("v",breaks=c(0, 10, 20, 30, 40, 50, 60, 70, 80, 100),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2019 Shared Room KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_SR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_SR_raster) + 
  tm_raster("v",breaks=c(0, 10, 20, 30, 40, 50, 60, 70, 80, 100),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Shared Room KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_HR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_HR_raster) + 
  tm_raster("v",breaks=c(0, 10, 20, 30, 40, 50, 60, 70, 80, 100),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Hotel Room KDE") + 
  tm_basemap('OpenStreetMap')

tmap_arrange(listings19SG_EH_osmap,listings21SG_EH_osmap, listings19SG_PR_osmap, listings21SG_PR_osmap, listings19SG_SR_osmap, listings21SG_SR_osmap, listings21SG_HR_osmap, asp=2, ncol=2)
tmap_mode('plot')

5.2 Second-order Spatial Point Patterns Analysis: G-Function and F-Function

Note

For each function used, I will also perform a Complete Spatial Randomness(CSR) testing on it. The hypothesis are the same for both functions:

The hypothesis are as follows: + H0 = The distribution of that type of Airbnb are randomly distributed in Singapore. + H1= The distribution of that type of Airbnb are not randomly distributed in Singapore. + The null hypothesis will be rejected if p-value is smaller than alpha value of 0.01 (i.e. at 99% confident interval).

a. Prepare split ppp objects

listings19SG_EH <- split(listings19SG_ppp.km)[[1]]
listings19SG_PR <- split(listings19SG_ppp.km)[[2]]
listings19SG_SR <- split(listings19SG_ppp.km)[[3]]

listings21SG_EH <- split(listings21SG_ppp.km)[[1]]
listings21SG_PR <- split(listings21SG_ppp.km)[[3]]
listings21SG_SR <- split(listings21SG_ppp.km)[[4]]

b. Analysing Spatial Point Process Using G-Function

Entire Home

# 2019
G_19_EH = Gest(listings19SG_EH, correction = "border")
plot(G_19_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_19_EH.csr <- envelope(listings19SG_EH, Gest, nsim = 99)
plot(G_19_EH.csr, xlim=c(0,1))

* From 0km to 0.5km, the observed(black) line is above the envelop, indicating there is enough evidence to reject the null hypothesis and say that Entire Home Airbnbs resemble clustered pattern. This can also be seen as G increases rapidly at short distance above the theoretical(red) line. However, from about 0.5km onward, we noticed that the observed(black) line goes along with (in) the envelope. This means that there is not enough evidence to reject the null hypothesis and suggesting Entire Home Airbnbs resemble randomness above 0.5m.

# 2021
G_21_EH = Gest(listings21SG_EH, correction = "border")
plot(G_21_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_21_EH.csr <- envelope(listings21SG_EH, Gest, nsim = 99)
plot(G_21_EH.csr, xlim=c(0,1))

* Compared with 2019 Entire Home Airbnbs, similar patterns can be observed for 2021 Entire Home Airbnbs. However, for 2021 Entire Home Airbnbs, the part outside the envelop is longer, ranging from 0km to 0.8km.

Private Room

# 2019
G_19_PR = Gest(listings19SG_PR, correction = "border")
plot(G_19_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_19_PR.csr <- envelope(listings19SG_PR, Gest, nsim = 99)
plot(G_19_PR.csr, xlim=c(0,1))

# 2021
G_21_PR = Gest(listings21SG_PR, correction = "border")
plot(G_21_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_21_PR.csr <- envelope(listings21SG_PR, Gest, nsim = 99)
plot(G_21_PR.csr, xlim=c(0,1))

Shared Room

# 2019
G_19_SR = Gest(listings19SG_SR, correction = "border")
plot(G_19_SR, xlim=c(0,2))

#MONTE CARLO TEST WITH G-FUCNTION
G_19_SR.csr <- envelope(listings19SG_SR, Gest, nsim = 99)
plot(G_19_SR.csr, xlim=c(0,2))

# 2021
G_21_SR = Gest(listings21SG_SR, correction = "border")
plot(G_21_SR, xlim=c(0,2))

#MONTE CARLO TEST WITH G-FUCNTION
G_21_SR.csr <- envelope(listings21SG_SR, Gest, nsim = 99)
plot(G_21_SR.csr, xlim=c(0,2))

c. Analysing Spatial Point Process Using F-Function

Entire Home

#2019
F_19_EH = Fest(listings19SG_EH)
plot(F_19_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_19_EH.csr <- envelope(listings19SG_EH, Fest, nsim = 99)
plot(F_19_EH.csr, xlim=c(0,1))

#2021
F_21_EH = Fest(listings21SG_EH)
plot(F_21_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_21_EH.csr <- envelope(listings21SG_EH, Fest, nsim = 99)
plot(F_21_EH.csr, xlim=c(0,1))

Private Room

#2019
F_19_PR = Fest(listings19SG_PR)
plot(F_19_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_19_PR.csr <- envelope(listings19SG_PR, Fest, nsim = 99)
plot(F_19_PR.csr, xlim=c(0,1))

#2021
F_21_PR = Fest(listings21SG_PR)
plot(F_21_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_21_PR.csr <- envelope(listings21SG_PR, Fest, nsim = 99)
plot(F_21_PR.csr, xlim=c(0,1))

Shared Room

#2019
F_19_SR = Fest(listings19SG_SR)
plot(F_19_SR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_19_SR.csr <- envelope(listings19SG_SR, Fest, nsim = 99)
plot(F_19_SR.csr, xlim=c(0,1))

#2021
F_21_SR = Fest(listings21SG_SR)
plot(F_21_SR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_21_SR.csr <- envelope(listings21SG_SR, Fest, nsim = 99)
plot(F_21_SR.csr, xlim=c(0,1))

d. Summary: G-Function and K-Function

From the G-Function and K-Function analysis above, we can see for all types of Airbnbs in both 2019 and 2021 resemble cluster patterns within a distance and gradually resemble randomness or regular pattern when the distance increases. Usually, 2021 Airbnbs have a longer distance that resemble cluster patterns than 2019 Airbnbs. This may because the tightness of clusters in 2021 is lesser than that of clusters in 2019, as can be seen in our KDE maps of First-order analysis. In general, there are fewer Airbnbs in 2021 after covid but their locations and distributions are still similar to Airbnbs in 2019.